#Set up

#s1
rm(list=ls())

setwd ("C:/Users/cecidy/Google Drive/ZH_analysis/")

source("ZHfunctions.R")
options(scipen=9999)

ZeroHunger <- read.csv("Data/ZHdf/ZeroHungerDF_2020.csv",header=TRUE, sep=",")

#s2
#############################################################################################################

#start1_

#ZH and Natural vegetation 04 to 13 robustness sample

###############################################################################################################

#1. Prepare each dataframe

#Get Rural municipalities, dont include those very large municipalities
Poverty04 <- subset(ZeroHunger, PopDensity20042004Analysis<=150 & SizeKm22004Analysis<=10000)

#Get list of vars
load(file = "Data/ForSamples/MPI00list.rda")
load(file = "Data/ForSamples/BasicNatVeg.rda")

RuralPoverty04 <- subset(Poverty04, select=c("ZHPerCapTotal04to13for2004Analysis1000",BasicNatVeg,MPI00List,
                                             "Biomefor2004AnalysisNoHybrid"))#4985
RuralPoverty04 <- na.omit(RuralPoverty04)

#Then take away the ones affected by rural credit
load(file = "Data/ForSamples/EcludeForRCProb04.rda")

RuralPoverty04 <- subset(RuralPoverty04,(!(IBGECode7digit %in% EcludeForRCProb04)))

#Then take away those that have boundary change 2000-2004
Mun00analysis <- read.csv("Data/ForSamples/ListMunicipalitiesChanged00to10Analysis.csv",header=TRUE, sep=",")
TakeOUt <- c("2005","2009")#I dont need those - those are already dealt with in the 2004 data!

Mun00analysis <- subset(Mun00analysis, (!(YearOfCreation %in% TakeOUt)))
mun00list <- as.vector(Mun00analysis$IBGECode7digit)

RuralPoverty04 <- subset(RuralPoverty04,(!(IBGECode7digit %in% mun00list)))

#Take away those with less than 95% observed in land use
RuralPoverty04 <- subset(RuralPoverty04, PropObsNatVeg>95)

#Just copy to get the "quality" dataset
RuralPoverty04quality <- RuralPoverty04

###################################################################################################################

#Make logged variables

#First make MPI with the correct 0s
test <- NrOfZeroALL(RuralPoverty04quality)#This gives me all the vars with 0s
MPIadd <- test[[1]]
Otheradd <- test[[2]]

WithAdded <- lapply(MPIadd,function(y) AddConstant(RuralPoverty04quality,y,addName=""))
WithAdded <- data.frame(do.call("cbind",WithAdded))

RuralPoverty04quality[,MPIadd] <- WithAdded[,MPIadd]#just override

#Then I need to make the MPI - here I need to use the 00 function
RuralPoverty04quality <- MakeMPI00(RuralPoverty04quality)

#Then for the others I cbind them on, and because Ive not included any of previous noCero there no duplicates
RuralPoverty04quality <- cbind(RuralPoverty04quality,lapply(Otheradd,function(y) AddConstant(RuralPoverty04quality,y,addName="nocero")))#

#################################################################################################

#And finally I need to log variables - get a list of all names

OtheraddCero <- paste(Otheradd,"nocero",sep="")#here which are names no cero

#All these need log
cols.log <- c("ZHPerCapTotal04to13for2004Analysis1000","MPI2000for2000Analysis","MPI2010for2000Analysis",
              "GDPPerCapPublicrealN2004for2004Analysis1000",OtheraddCero,"RemoteMinPopFifty2004Analysis",
              "MeanElevationfor2004analysis","MeanSlopefor2004analysis","Totalkm2_Coverage","Sumkm2_NatVeg2004","Sumkm2_NatVeg2013",
              "PopDensity20042004Analysis")
addlog <- function(x) log10(x)
Log10variables <- data.frame(sapply(RuralPoverty04quality[cols.log],addlog))
colnames(Log10variables) <- paste(colnames(Log10variables), "log10",sep="")

#Get all onto ZH
RuralPoverty04quality <- cbind(RuralPoverty04quality,Log10variables)
head(RuralPoverty04quality)

##########################################################################################################################

#Remove those I dont need
Log10variables <- NULL
Poverty04 <- NULL
ZeroHunger <- NULL
TempVars <- NULL
WithAdded <- NULL

#####################################################################

#I make it on log here but that I might have to go back and change after checking for log or not!
RuralPoverty04quality<- getDataFrame(RuralPoverty04quality,"contr.Sum",
                                     contrastVar1="stateName",contrastVar2="Biomefor2004Analysis",setRefToMostCommonLevel,
                                     BaseVar="ZHPerCapTotal04to13for2004Analysis1000log10",
                                     MainName="ZHPerCapTotal04to13for2004Analysis1000log10scaledMain")

#fin1_

#############################################################################################################

#start2_

#Prepare vectors of variables for regressions

###############################################################################################################

Depvar <- "Sumkm2_NatVeg2013log10"

IndepVarLin <- "ZHPerCapTotal04to13for2004Analysis1000log10scaledMain"#already scaled so dont need to do it again

baselineDepVar <- "scale(Sumkm2_NatVeg2004log10)"

stateName <- "Biomefor2004AnalysisNoHybrid"

intLin <- c("ZHPerCapTotal04to13for2004Analysis1000log10scaledMain","Biomefor2004AnalysisNoHybrid")
intLin <- paste(intLin,collapse="*")

#For CBPS models
cbpsDepVar <- "ZHPerCapTotal04to13for2004Analysis1000log10"
cbpsbaselineDepVar <- "Sumkm2_NatVeg2004log10"

#########################################################################################################################

xvars <- c("scale(MPI2000for2000Analysis)","scale(GDPPerCapPublicrealN2004for2004Analysis1000log10)",
           "scale(TotalHectareCrops2004for2004Analysisnocerolog10)","scale(TotalHectarePasture2006for2004Analysisnocerolog10)",
           "scale(TotalHectareFarmsLess502006for2004Analysisnocerolog10)","scale(RemoteMinPopFifty2004Analysislog10)",
           "scale(ChangeCummulativeSPEI02and04to11and13for2004Analysis)","scale(TotRCpcNOPRONAF04to13for2004Analysis1000nocerolog10)",
           "scale(MeanElevationfor2004analysislog10)","scale(MeanSlopefor2004analysislog10)",
           "scale(PopDensity20042004Analysis)",
           "scale(Sumkm2_ProtectedArea2004nocerolog10)","scale(Totalkm2_Coveragelog10)",
           "stateName")

cbpsVars <- c("MPI2000for2000Analysis","GDPPerCapPublicrealN2004for2004Analysis1000log10",
              "TotalHectareCrops2004for2004Analysisnocerolog10","TotalHectarePasture2006for2004Analysisnocerolog10",
              "TotalHectareFarmsLess502006for2004Analysisnocerolog10","RemoteMinPopFifty2004Analysislog10",
              "ChangeCummulativeSPEI02and04to11and13for2004Analysis","TotRCpcNOPRONAF04to13for2004Analysis1000nocerolog10",
              "MeanElevationfor2004analysislog10","MeanSlopefor2004analysislog10","PopDensity20042004Analysis",
              "Sumkm2_ProtectedArea2004nocerolog10","Totalkm2_Coveragelog10","stateName","Biomefor2004AnalysisNoHybrid")

#######################################################################################################################


#fin2_

#temp1


#I run ZH/PRONAF/BF robustness mpi00 mods withouth Amazon and Pantanal

#This because 77% of Amazon area is excluded in robustness sample and 99% of Pantanal (only 1 mun remains)

#take out Amazon and Pantanal
RuralPoverty04quality <- subset(RuralPoverty04quality, !Biomefor2004AnalysisNoHybrid %in% c("Amazon","Pantal"))

#temp2

################################################################################

